@conference {stewart_new_2007,
	title = {A New Generation of Textual Corpora: Mining Corpora from Very Large Collections},
	booktitle = {Proceedings of the 7th ACM/IEEE-CS Joint Conference on Digital Libraries},
	series = {{JCDL} {\textquoteright}07},
	year = {2007},
	note = {00017},
	pages = {356{\textendash}365},
	publisher = {ACM},
	organization = {ACM},
	address = {New York, NY, USA},
	abstract = {This article considers OCR programs developed for reading classical Greek. The authors of this paper work to show how computational correction practices can create text documents with accuracy ratings comparable to "hand-crafted corpora." Three challenges of Greek OCR documents are identified: exclusion of variant readings, exclusion of multiple editions, and inability to draw connections between texts that reference each other. The authors point to work done, individually, in all of these areas by citing different digital archives and versioning softwares. However, these three challenges are not addressed by a single project. In order to grapple with this, the authors structure a multi-tiered approach to OCRing Greek texts. The authors discovered that simple error correction techniques based on word lists and morphological analyzer improve results, increase accuracy by including multiple editions. In closing, a greater is made towards trajectories of future work, including image quality, comparison errors, and recognizing accents. },
	keywords = {ancient greek, text alignment, {OCR} evaluation},
	isbn = {978-1-59593-644-8},
	doi = {10.1145/1255175.1255247},
	url = {http://doi.acm.org/10.1145/1255175.1255247},
	author = {Stewart, Gordon and Crane, Gregory and Babeu, Alison}
}
